The most recent update of this html document occurred: Thu Dec 22 10:09:59 2016

> library(knitr)
> library(ggplot2)
> library(reshape)
> library(DESeq2)
> library(CHBUtils)
> library(limma)
> library(gtools)
> library(gridExtra)
> library(devtools)
> library(dplyr)
> library(tidyr)
> library(pheatmap)
> library(rio)
> library(biomaRt)
> dselect = dplyr::select
> library(EnsDb.Hsapiens.v79)
> get_human = function(ids) {
+     names(ids)[1] = "external_gene_name"
+     ensembl = useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", 
+         host = "useast.ensembl.org")
+     annot = getBM(attributes = c("hsapiens_homolog_ensembl_gene", "external_gene_name"), 
+         filters = c("external_gene_name"), values = ids$external_gene_name, 
+         mart = ensembl)
+     left_join(annot, ids, by = "external_gene_name") %>% filter(hsapiens_homolog_ensembl_gene != 
+         "") %>% mutate(gene = convertIDs(hsapiens_homolog_ensembl_gene, "GENEID", 
+         "GENENAME", EnsDb.Hsapiens.v79, "useFirst"))
+ }
> fig1 = get_human(import("f1b.csv"))
> fig1narrow = get_human(import("f1bnarrow.csv"))
> 
> exp1 = import("Experiment1_Data.xlsx")
> exp1$Gene_Name = gsub("\\'", "", exp1$Gene_Name)
> colnames(exp1) = gsub("\\+", "p", colnames(exp1))
> exp2 = import("Experiment2_Data.xlsx")
> exp2$Gene_Name = gsub("\\'", "", exp2$Gene_Name)
> colnames(exp2) = gsub("\\+", "p", colnames(exp2))
> 
> data = inner_join(exp1[, 1:11] %>% filter(Gene_Name != ""), exp2[, 1:11] %>% 
+     filter(Gene_Name != ""), by = "Gene_Name") %>% dselect(Gene_Name, Entrez_Gene_ID = Entrez_Gene_ID.x, 
+     Description = Description.x, d1_nt = nt.x, d1_sh17 = sh17.x, d1_sh16 = sh16.x, 
+     d1_ntp = ntp.x, d1_sh17p = sh17p.x, d1_sh16p = sh16p.x, d2_nt = nt.y, d2_sh17 = sh17.y, 
+     d2_sh16 = sh16.y, d2_ntp = ntp.y, d2_sh17p = sh17p.y, d2_sh16p = sh16p.y)
> 
> counts = data[, 4:15]
> row.names(counts) = data$Gene_Name
> 
> 
> metadata = rbind(data.frame(donor = "donor1", group = colnames(exp1)[3:8]) %>% 
+     mutate(name = paste0("d1_", group)), data.frame(donor = "donor2", group = colnames(exp2)[3:8]) %>% 
+     mutate(name = paste0("d2_", group))) %>% mutate(treat = ifelse(grepl("p$", 
+     group), "plus", "minus")) %>% mutate(type = gsub("p$", "", group)) %>% mutate(type_simple = gsub("[0-9]", 
+     "", type))
> rownames(metadata) = metadata$name
> metadata = metadata[colnames(counts), ]
> metadata
          donor group     name treat type type_simple
d1_nt    donor1    nt    d1_nt minus   nt          nt
d1_sh17  donor1  sh17  d1_sh17 minus sh17          sh
d1_sh16  donor1  sh16  d1_sh16 minus sh16          sh
d1_ntp   donor1   ntp   d1_ntp  plus   nt          nt
d1_sh17p donor1 sh17p d1_sh17p  plus sh17          sh
d1_sh16p donor1 sh16p d1_sh16p  plus sh16          sh
d2_nt    donor2    nt    d2_nt minus   nt          nt
d2_sh17  donor2  sh17  d2_sh17 minus sh17          sh
d2_sh16  donor2  sh16  d2_sh16 minus sh16          sh
d2_ntp   donor2   ntp   d2_ntp  plus   nt          nt
d2_sh17p donor2 sh17p d2_sh17p  plus sh17          sh
d2_sh16p donor2 sh16p d2_sh16p  plus sh16          sh

Distribution of expression values

> counts = counts[rowSums(counts[, 1:6] > 0) > 2 & rowSums(counts[, 6:12] > 0) > 
+     2, ]
> 
> log2counts = log2(counts + 1)
> 
> ggplot(melt(log2counts), aes(x = value, color = variable)) + geom_density() + 
+     theme_bw()

> mds(log2counts, k = 2, condition = metadata$donor) + theme_bw()

Differential analysis with DESeq2

Just using DESeq2 without any further addition to the analysis

Dispersion values

> dds = DESeqDataSetFromMatrix(counts, metadata, design = ~donor + treat + type)
> # rdds = rlog(dds)
> dds = DESeq(dds)
> DESeq2::plotDispEsts(dds)

> sf <- sizeFactors(dds)
> disp <- dispersions(dds)
> dds_group = DESeqDataSetFromMatrix(counts, metadata, design = ~donor + group)
> sizeFactors(dds_group) <- sf
> dispersions(dds_group) <- disp
> dds_group <- nbinomWaldTest(dds_group)

How the gene SET looks like:

> res_sh16_vs_control = results(dds_group, list("groupsh16", "groupnt"))
> suppressWarnings(DEGreport::degPlot(dds_group, res_sh16_vs_control["SET", , 
+     drop = FALSE], n = 1, xs = "treat", group = "type", batch = "donor"))

WT treatment effect

  • nt vs nt+
> dds_nt = dds_group[, c(1, 4, 7, 10)]
> dds_nt$group <- droplevels(dds_nt$group)
> dds_nt = DESeq(dds_nt)
> res_nt_vs_plus = results(dds_nt, list("groupntp", "groupnt"))
> DESeq2::plotMA(res_nt_vs_plus)
> res = res_nt_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>% 
+     arrange(padj) %>% tibble::column_to_rownames("id")
> 
> res %>% head(15) %>% knitr::kable()
baseMean log2FoldChange lfcSE stat pvalue padj
SLPI 7662459.19 1.5830902 0.1714466 9.233722 0e+00 0.0000000
CXCL8 1188881.10 1.9018591 0.2117914 8.979869 0e+00 0.0000000
OSR2 99512.27 1.6784183 0.2368076 7.087687 0e+00 0.0000000
SERPINB2 10393885.04 1.1293431 0.1622478 6.960605 0e+00 0.0000000
NCAM1 1113683.63 1.3418642 0.1984219 6.762681 0e+00 0.0000000
PCDHB2 1087238.87 1.3736877 0.2133116 6.439817 0e+00 0.0000001
S100A12 12499122.34 1.4333957 0.2368155 6.052795 0e+00 0.0000014
GP1BB 1293988.87 -1.0443183 0.1739462 -6.003685 0e+00 0.0000017
CD177 1227089.58 1.3694676 0.2359382 5.804349 0e+00 0.0000051
CYBB 2682054.20 1.1214983 0.1979280 5.666193 0e+00 0.0000104
C14orf93 527370.30 1.3032385 0.2360165 5.521810 0e+00 0.0000217
TOPAZ1 178061.31 1.2774197 0.2367806 5.394951 1e-07 0.0000407
AKR1B1 11230807.19 -0.8279917 0.1585309 -5.222903 2e-07 0.0000965
MARCKS 6172795.35 0.9119084 0.1787299 5.102158 3e-07 0.0001708
GNG12 482679.35 1.1956420 0.2367881 5.049417 4e-07 0.0002104
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> deseq2_ntplus_effect = res

sh treatment effect

  • sh16/17 vs sh16/17+
> dds_sh = dds[, c(2:3, 5:6, 8:9, 11:12)]
> dds_sh$type <- droplevels(dds_sh$type)
> design(dds_sh) = ~treat
> dds_sh = DESeq(dds_sh)
> res_sh_vs_plus = results(dds_sh, list("treatplus", "treatminus"))
> DESeq2::plotMA(res_sh_vs_plus)
> res = res_sh_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>% 
+     arrange(padj) %>% tibble::column_to_rownames("id")
> 
> res %>% head(15) %>% knitr::kable()
baseMean log2FoldChange lfcSE stat pvalue padj
S100A12 15172358.9 1.1570647 0.1428284 8.101085 0.00e+00 0.0000000
S100A9 158706911.4 1.1945729 0.1477780 8.083562 0.00e+00 0.0000000
NAMPT 34387685.8 0.6742847 0.1179963 5.714458 0.00e+00 0.0000277
AKR1B1 11467653.1 -0.6012801 0.1110036 -5.416763 1.00e-07 0.0000917
SLC29A1 334093.4 -0.4334638 0.0798932 -5.425541 1.00e-07 0.0000917
VASP 27170215.4 0.5375306 0.1015978 5.290772 1.00e-07 0.0001533
SULT1B1 819343.0 0.6853209 0.1377648 4.974571 7.00e-07 0.0007057
DYSF 20479923.9 0.7101058 0.1473223 4.820084 1.40e-06 0.0013550
UPP1 2779164.9 0.6648087 0.1453429 4.574070 4.80e-06 0.0040149
IL1RN 2082274.8 0.6684111 0.1478111 4.522063 6.10e-06 0.0046261
GP1BB 945864.0 -0.6150736 0.1366861 -4.499899 6.80e-06 0.0046688
CYBB 3010218.0 0.6547588 0.1477564 4.431341 9.40e-06 0.0058952
SERPINA1 2126205.1 0.5750953 0.1320826 4.354058 1.34e-05 0.0077655
ACSL1 17003635.6 0.6398561 0.1478444 4.327903 1.51e-05 0.0081225
ANXA3 47538229.4 0.5738806 0.1375088 4.173411 3.00e-05 0.0149562
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> deseq2_shplus_effect = res

Proteins induced by SET in WT but not in SET-KD

The idea is to use the DE genes from nt vs nt+ and remove the ones that changed due to sh6 vs sh16+ or sh17 vs sh17+.

We used the FDR < 0.05 to get the DE and pvalue>0.2 to get the non-changed genes.

> deseq2_sh_dependent = inner_join(deseq2_ntplus_effect %>% tibble::rownames_to_column("id") %>% 
+     filter(padj < 0.05) %>% dselect(id, nt_padj = padj), deseq2_shplus_effect %>% 
+     tibble::rownames_to_column("id") %>% filter(pvalue > 0.1) %>% dselect(id, 
+     sh_pval = padj)) %>% rowwise() %>% mutate(sh_max = sh_pval) %>% mutate(diff = nt_padj - 
+     sh_max) %>% arrange((nt_padj))
> 
> suppressWarnings(DEGreport::degPlot(dds_group, deseq2_ntplus_effect[deseq2_sh_dependent$id, 
+     ], n = 9, xs = "treat", group = "type", batch = "donor"))

Common proteins among comparisons

> deseq2_full_table = right_join(as.data.frame(assay(rlog(dds))) %>% tibble::rownames_to_column("id"), 
+     full_join(as.data.frame(deseq2_ntplus_effect) %>% tibble::rownames_to_column("id") %>% 
+         dselect(id, nt_neg_vs_pos_logFC = log2FoldChange, nt_neg_vs_pos_pval = pvalue, 
+             nt_neg_vs_pos_FDR = padj), as.data.frame(deseq2_shplus_effect) %>% 
+         tibble::rownames_to_column("id") %>% dselect(id, sh_neg_vs_pos_logFC = log2FoldChange, 
+         sh_neg_vs_pos_pval = pvalue, sh_neg_vs_pos_FDR = padj), by = "id"), 
+     by = "id") %>% mutate(nt_stimulus_dependent = ifelse(nt_neg_vs_pos_FDR < 
+     0.1, "Yes", "No"), sh_stimulus_dependent = ifelse(sh_neg_vs_pos_FDR < 0.1, 
+     "Yes", "No"), SET_dependent = ifelse(nt_neg_vs_pos_FDR < 0.1 & sh_neg_vs_pos_pval > 
+     0.1, "Yes", "No"))
> 
> write.table(deseq2_full_table, "deseq2_table.xls", sep = "\t", row.names = FALSE)
> 
> ma = deseq2_full_table %>% dselect(nt_stimulus_dependent, sh_stimulus_dependent, 
+     SET_dependent)
> ma = ma == "Yes"
> ma = ma * 1
> ma[is.na(ma)] = 0
> UpSetR::upset(as.data.frame(ma), sets = c("nt_stimulus_dependent", "sh_stimulus_dependent", 
+     "SET_dependent"))

> DT::datatable(deseq2_full_table %>% filter(SET_dependent == "Yes" | nt_stimulus_dependent == 
+     "Yes"))

Differential analysis with lima-voom as microarray data

> d = model.matrix(~0 + donor + treat, metadata)
> y = normalizeBetweenArrays(log2(counts + 1), method = "quantile")
> 
> limma_ma = vooma(y, plot = FALSE)
> 
> limmaPlot = function(counts, genes, metadata, xs = "time", group = "condition", 
+     batch = NULL) {
+     pp = lapply(genes, function(gene) {
+         dd = data.frame(count = counts[gene, ], time = metadata[, xs])
+         if (is.null(group)) {
+             dd$treatment = "one_group"
+         } else {
+             dd$treatment = metadata[row.names(dd), group]
+         }
+         if (!is.null(batch)) {
+             dd$batch = metadata[row.names(dd), batch]
+             p = ggplot(dd, aes(x = time, y = count, color = batch, shape = treatment))
+         } else {
+             p = ggplot(dd, aes(x = time, y = count, color = treatment, shape = treatment))
+         }
+         p = p + stat_smooth(aes(x = time, y = count, group = treatment, color = treatment), 
+             fill = "grey80") + geom_jitter(size = 1, alpha = 0.7, height = 0, 
+             width = 0.2) + theme_bw(base_size = 7) + ggtitle(gene)
+         if (length(unique(dd$treatment)) == 1) {
+             p = p + scale_color_brewer(guide = FALSE, palette = "Set1") + scale_fill_brewer(guide = FALSE, 
+                 palette = "Set1")
+         }
+         p
+     })
+     n = ceiling(length(pp))
+     do.call(grid.arrange, pp)
+ }

WT treatment effect

> library(limma)
> keep = grepl("nt", row.names(metadata))
> d = model.matrix(~0 + donor + treat, metadata[keep, ])
> y = normalizeBetweenArrays(log2(counts[, keep]), method = "quantile")
> 
> v = voomaByGroup(y, group = metadata$type[keep], d, plot = TRUE)
> 
> f = lmFit(v, d)
> fb = eBayes(f)
> 
> 
> res = topTable(fb, coef = "treatplus", sort.by = "p", number = Inf)
> 
> knitr::kable(head(res, 15))
logFC AveExpr t P.Value adj.P.Val B
CXCL8 2.581916 19.80694 9.767088 0.0000092 0.0344386 3.6667038
SLPI 1.890419 22.79432 9.091814 0.0000157 0.0344386 3.4500397
S100A12 3.169849 23.15430 9.039778 0.0000164 0.0344386 3.3942174
C14orf93 3.086437 18.35416 8.596161 0.0000238 0.0344386 2.7013356
OSR2 3.342374 15.58042 8.542010 0.0000250 0.0344386 1.8329605
CSF3 4.465571 18.46044 8.437917 0.0000273 0.0344386 2.5017388
PCDHB2 1.893239 20.03219 7.839069 0.0000468 0.0492400 2.4330637
KLHL35 3.440221 15.72689 7.673301 0.0000547 0.0492400 2.0309624
CD177 2.529608 19.80707 7.587720 0.0000593 0.0492400 2.1975587
S100A8 3.021722 26.91976 7.488745 0.0000651 0.0492400 2.1333866
S100A9 2.390447 26.73337 7.249860 0.0000821 0.0564570 1.9374803
NCAM1 1.847449 18.76559 7.071941 0.0000980 0.0590235 1.7140009
DEFA3 4.510520 15.14801 6.986526 0.0001068 0.0590235 0.6898536
TOPAZ1 2.491827 17.03690 6.963874 0.0001093 0.0590235 1.3456436
DYSF 1.804080 24.39469 6.709349 0.0001420 0.0715635 1.5057166
> volcano_density_plot(res[, c("logFC", "P.Value")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> ntplus_effect = res

sh vs nt in treatment

> library(limma)
> keep = grepl("p$", row.names(metadata))
> d = model.matrix(~0 + donor + type_simple, metadata[keep, ])
> y = normalizeBetweenArrays(log2(counts[, keep]), method = "quantile")
> 
> v = voom(y, d, plot = TRUE)
> 
> f = lmFit(v, d)
> fb = eBayes(f)
> 
> res = topTable(fb, coef = "type_simplesh", sort.by = "p", number = Inf)
> 
> knitr::kable(head(res, 15))
logFC AveExpr t P.Value adj.P.Val B
KLHL35 -0.3454307 6.596105 -12.048545 0.0000234 0.1156677 3.2655719
FAM188B -0.2106626 6.562483 -11.492338 0.0000306 0.1156677 2.8167320
SFMBT2 -0.2977199 6.718479 -9.931417 0.0000694 0.1675005 2.3965734
SLPI -0.0710086 7.263293 -9.504896 0.0000886 0.1675005 2.1231709
SNCA 0.0468367 7.207933 7.810040 0.0002603 0.3126708 1.0136515
IFT122 -0.1594848 6.795786 -7.672955 0.0002864 0.3126708 1.0653571
PTGS1 0.0439758 7.284724 7.657808 0.0002895 0.3126708 0.8510756
ZNF568 -0.1765354 6.681563 -7.295638 0.0003756 0.3549800 0.7320557
ENO2 0.0538676 7.247348 6.927878 0.0004949 0.4156864 0.2929790
DEFA3 -0.1436683 6.777069 -6.610478 0.0006339 0.4792400 0.2707045
PTRF 0.0421644 7.235164 6.393764 0.0007548 0.4822257 -0.1661412
RAP1GAP -0.1604823 6.899775 -6.335942 0.0007914 0.4822257 0.0369817
ADAMTS12 0.1465757 6.576018 6.279414 0.0008292 0.4822257 -0.1288461
MROH2A 0.1842238 6.818400 6.122108 0.0009458 0.4917819 -0.0917911
BMP1 -0.2110140 6.880114 -6.085206 0.0009758 0.4917819 -0.1676670
> volcano_density_plot(res[, c("logFC", "P.Value")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> shplus_ntplus_effect = res

sh16/sh17 treatment effect

> library(limma)
> keep = !grepl("nt", row.names(metadata))
> d = model.matrix(~0 + donor + type + treat, metadata[keep, ])
> y = normalizeBetweenArrays(log2(counts[, keep]), method = "quantile")
> 
> v = voomaByGroup(y, group = metadata$type[keep], d, plot = TRUE)
> 
> f = lmFit(v, d)
> fb = eBayes(f)
> 
> res = topTable(fb, coef = "treatplus", sort.by = "p", number = Inf)
> 
> knitr::kable(head(res, 15))
logFC AveExpr t P.Value adj.P.Val B
RETN 1.5377891 20.66102 23.35461 1.00e-07 0.0006611 7.588722
S100A9 2.4332080 26.43037 21.01202 2.00e-07 0.0006758 6.936341
CYBA 1.0056334 20.53080 17.32453 7.00e-07 0.0016527 6.296766
FCAR 1.3789761 21.01399 16.57269 9.00e-07 0.0016692 6.150770
CYBB 1.2631403 21.18817 15.49117 1.40e-06 0.0020974 5.834846
CD177 2.6267508 19.20314 14.66743 2.00e-06 0.0025165 5.263359
SULT1B1 1.0076530 19.38423 12.61322 5.40e-06 0.0044148 4.563903
ACSL1 1.1690543 23.65905 12.60906 5.40e-06 0.0044148 4.768172
CES1 0.5766228 20.75964 12.49156 5.80e-06 0.0044148 4.637054
ITGAM 0.7883593 22.90557 12.17975 6.80e-06 0.0044148 4.566317
NAMPT 0.7974357 24.81606 12.17556 6.90e-06 0.0044148 4.524472
MCEMP1 1.4549909 19.34002 12.13479 7.00e-06 0.0044148 4.348670
HLA-DQA1 -0.9592994 19.89827 -11.19283 1.19e-05 0.0066671 3.953712
HIP1 0.6582028 23.38038 11.13031 1.23e-05 0.0066671 4.017406
ACPP 0.6758494 19.61586 10.86617 1.44e-05 0.0069528 3.770140
> volcano_density_plot(res[, c("logFC", "P.Value")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> shplus_effect = res

Proteins induced by SET in WT but not in KD

> sh_dependent = inner_join(ntplus_effect %>% tibble::rownames_to_column("id") %>% 
+     filter(adj.P.Val < 0.1), shplus_effect %>% tibble::rownames_to_column("id") %>% 
+     filter(adj.P.Val > 0.2), by = "id")
> 
> suppressWarnings(DEGreport::degPlot(dds_group, ntplus_effect[sh_dependent$id, 
+     ], n = 8, xs = "treat", group = "type", batch = "donor"))

> # suppressWarnings(limmaPlot(limma_ma$E, sh_dependent$id, metadata, xs =
> # 'treat', group = 'type', batch = 'donor' ))

Common proteins among comparisons

> full_table = full_join(as.data.frame(limma_ma$E) %>% tibble::rownames_to_column("id"), 
+     full_join(as.data.frame(ntplus_effect) %>% tibble::rownames_to_column("id") %>% 
+         dselect(id, nt_neg_vs_pos_logFC = logFC, nt_neg_vs_pos_pval = P.Value, 
+             nt_neg_vs_pos_FDR = adj.P.Val), as.data.frame(shplus_effect) %>% 
+         tibble::rownames_to_column("id") %>% dselect(id, sh_neg_vs_pos_logFC = logFC, 
+         sh_neg_vs_pos_pval = P.Value, sh_neg_vs_pos_FDR = adj.P.Val), by = "id"), 
+     by = "id") %>% full_join(as.data.frame(shplus_ntplus_effect) %>% tibble::rownames_to_column("id") %>% 
+     dselect(id, SET_dependet_in_stimulus_FDR = adj.P.Val), by = "id") %>% mutate(nt_stimulus_dependent = ifelse(nt_neg_vs_pos_FDR < 
+     0.1, "Yes", "No"), sh_stimulus_dependent = ifelse(sh_neg_vs_pos_FDR < 0.1, 
+     "Yes", "No"), SET_dependent = ifelse(nt_neg_vs_pos_FDR < 0.1 & sh_neg_vs_pos_pval > 
+     0.1, "Yes", "No")) %>% left_join(fig1 %>% dselect(id = gene, fig1b = cluster), 
+     by = "id") %>% left_join(fig1narrow %>% dselect(id = gene, fig1bnarrow = cluster), 
+     by = "id")
> 
> write.table(full_table, "limma_table.xls", sep = "\t", row.names = FALSE)
> 
> ma = full_table %>% mutate(fig1 = !is.na(fig1b), fig1narrow = !is.na(fig1bnarrow)) %>% 
+     dselect(nt_stimulus_dependent, sh_stimulus_dependent, SET_dependent, fig1, 
+         fig1narrow)
> ma = ma == "Yes" | ma == TRUE
> ma = ma * 1
> ma[is.na(ma)] = 0
> UpSetR::upset(as.data.frame(ma), sets = colnames(ma))

> export(ma, "limma_commons.xls", "tsv")
> 
> DT::datatable(full_table %>% filter(SET_dependent == "Yes" | nt_stimulus_dependent == 
+     "Yes"))

Normalize with RUVseq

It doesn’t help a lot this time. I guess too few replicates to remove noise from any of them.

> library(RUVSeq)
> ruv = RUVs(counts(dds_group), cIdx = row.names(counts), k = 1, scIdx = matrix(c(1:6, 
+     7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("1 factor")

> ruv = RUVs(as.matrix(counts), cIdx = row.names(counts), k = 2, scIdx = matrix(c(1:6, 
+     7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("2 factors")

> ruv = RUVs(as.matrix(counts), cIdx = row.names(counts), k = 3, scIdx = matrix(c(1:6, 
+     7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("3 factors")

Other code used for exploration

sh16 treatment effect

  • sh16 vs sh16+
> res_sh16_vs_plus = results(dds_group, list("groupsh16p", "groupsh16"))
> DESeq2::plotMA(res_sh16_vs_plus)
> res = res_sh16_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>% 
+     arrange(padj) %>% tibble::column_to_rownames("id")
> 
> res %>% head(15) %>% knitr::kable()
baseMean log2FoldChange lfcSE stat pvalue padj
ACSL1 16567561.0 1.2779508 0.1643207 7.777172 0e+00 0.0000000
CYBB 2900830.1 1.1087726 0.1527041 7.260924 0e+00 0.0000000
DHRS9 825946.3 1.1875589 0.1797906 6.605232 0e+00 0.0000001
DYSF 20773280.4 1.3021073 0.2035469 6.397086 0e+00 0.0000003
UTRN 17584830.4 0.8748320 0.1393133 6.279600 0e+00 0.0000005
EFHD2 19151142.9 0.4732838 0.0791501 5.979574 0e+00 0.0000028
DOK3 4842773.4 0.5519350 0.0941608 5.861625 0e+00 0.0000050
AKAP13 9230549.6 0.4194689 0.0736838 5.692827 0e+00 0.0000105
RBPJ 1246748.9 0.5915706 0.1039267 5.692191 0e+00 0.0000105
SLK 14105526.4 0.3047064 0.0550687 5.533204 0e+00 0.0000238
GRAMD1A 249876.0 0.7319624 0.1340475 5.460473 0e+00 0.0000299
HIP1 12592345.6 0.7236909 0.1324214 5.465058 0e+00 0.0000299
HCK 6270652.0 0.7460440 0.1423116 5.242327 2e-07 0.0000922
AKR1B1 11388704.5 -0.6597371 0.1266926 -5.207387 2e-07 0.0001034
SMPDL3A 349707.7 1.0411864 0.2029895 5.129263 3e-07 0.0001466
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> deseq2_sh16plus_effect = res

sh17 treatment effect

  • sh17 vs sh17+
> res_sh17_vs_plus = results(dds_group, list("groupsh17p", "groupsh17"))
> DESeq2::plotMA(res_sh17_vs_plus)
> res = res_sh17_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>% 
+     arrange(padj) %>% tibble::column_to_rownames("id")
> 
> res %>% head(15) %>% knitr::kable()
baseMean log2FoldChange lfcSE stat pvalue padj
CYBB 2900830.1 1.1552149 0.1527042 7.565051 0.00e+00 0.0000000
ACSL1 16567561.0 0.9316196 0.1643207 5.669519 0.00e+00 0.0000229
DYSF 20773280.4 1.1549701 0.2035469 5.674221 0.00e+00 0.0000229
EFHD2 19151142.9 0.4533775 0.0791500 5.728076 0.00e+00 0.0000229
HEMGN 266539.7 -0.7048192 0.1248420 -5.645692 0.00e+00 0.0000229
AKR1B1 11388704.5 -0.6950905 0.1266924 -5.486443 0.00e+00 0.0000477
MMP8 374974.2 0.9127065 0.1689500 5.402227 1.00e-07 0.0000656
UTRN 17584830.4 0.7242910 0.1393133 5.199009 2.00e-07 0.0001746
HCK 6270652.0 0.7336007 0.1423116 5.154891 3.00e-07 0.0001966
CYBA 1776465.4 0.8769022 0.1735016 5.054146 4.00e-07 0.0003015
SEMA6B 302314.0 0.5819162 0.1243450 4.679853 2.90e-06 0.0018198
CD109 1324552.9 -0.6074466 0.1333961 -4.553707 5.30e-06 0.0028272
TYROBP 2200631.4 0.5676309 0.1244887 4.559696 5.10e-06 0.0028272
CEP170 7763560.4 0.2428626 0.0552063 4.399184 1.09e-05 0.0054120
DHRS9 825946.3 0.7827607 0.1797907 4.353733 1.34e-05 0.0054898
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")

> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type", 
+     batch = "donor"))

> deseq2_sh17plus_effect = res